First, let us replicate the table of known stopping probabilities for the following group sequential design:
Probabilities are as follows:
| \(\theta = 0\) | \(\theta = 0.5\) | |||
|---|---|---|---|---|
| Analysis | Prob stop for futility | Prob stop for efficacy | Prob stop for futility | Prob stop for efficacy |
| 1 | 0.500 | 0.006 | 0.057 | 0.179 |
| 2 | 0.299 | 0.019 | 0.042 | 0.420 |
| 3 | 0.137 | 0.038 | 0.049 | 0.253 |
First, let us simulate the straightforward case of Analysis 1 under the null.
For futility:
theta <- 0
sigma2 <- 1
z_vec <- c()
for (i in 1:50000) {
x1 <- rnorm(20)
x2 <- rnorm(20)
mean.diff <- mean(x1 - x2)
z <- mean.diff * sqrt( 20 / (2*sigma2) )
z_vec[i] <- z
}
mean(z_vec <= 0)
## [1] 0.50418
For efficacy:
for (i in 1:50000) {
x1 <- rnorm(20)
x2 <- rnorm(20)
mean.diff <- mean(x1 - x2)
z <- mean.diff * sqrt( 20 / (2*sigma2) )
z_vec[i] <- z
}
mean(z_vec >= 2.5)
## [1] 0.0059
Now, Analysis 1 under the alternative.
For futility:
theta <- 0.5
sigma2 <- 1
z_vec <- c()
for (i in 1:50000) {
x1 <- rnorm(20, mean = theta, sd = sqrt(sigma2))
x2 <- rnorm(20)
mean.diff <- mean(x1 - x2)
z <- mean.diff * sqrt( 20 / (2*sigma2) )
z_vec[i] <- z
}
mean(z_vec <= 0)
## [1] 0.05616
For efficacy:
theta <- 0.5
sigma2 <- 1
z_vec <- c()
for (i in 1:50000) {
x1 <- rnorm(20, mean = theta, sd = sqrt(sigma2))
x2 <- rnorm(20)
mean.diff <- mean(x1 - x2)
z <- mean.diff * sqrt( 20 / (2*sigma2) )
z_vec[i] <- z
}
mean(z_vec >= 2.5)
## [1] 0.18194
Now, the case of all three analyses under the null.
theta <- 0
sigma2 <- 1
z_vec <- c()
z2_vec <- c()
z3_vec <- c()
analysis2_count <- 0
analysis3_count <- 0
for (i in 1:100000) {
x1 <- rnorm(20)
x2 <- rnorm(20)
mean.diff <- mean(x1 - x2)
z <- mean.diff * sqrt( 20 / (2*sigma2) )
z_vec[i] <- z
if ( (z > 0) & (z < 2.5) ) {
analysis2_count <- analysis2_count + 1
x3 <- rnorm(20)
x4 <- rnorm(20)
x1 <- c(x1, x3)
x2 <- c(x2, x4)
mean.diff <- mean(x1 - x2)
z2 <- mean.diff * sqrt( 40 / (2*sigma2) )
z2_vec[analysis2_count] <- z2
if ( (z2 > 0.75) & (z2 < 2) ) {
analysis3_count <- analysis3_count + 1
x5 <- rnorm(20)
x6 <- rnorm(20)
x1 <- append(x1, x5)
x2 <- c(x2, x6)
mean.diff <- mean(x1 - x2)
z3 <- mean.diff * sqrt( 60 / (2*sigma2) )
z3_vec[analysis3_count] <- z3
}
}
}
For futility:
mean(z_vec <= 0)
## [1] 0.50055
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec <= 0.75)
## [1] 0.29652
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec > 0.75 & z2_vec < 2) * mean(z3_vec < 1.5)
## [1] 0.1385
For efficacy:
mean(z_vec > 2.5)
## [1] 0.00653
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec > 2)
## [1] 0.01964
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec > 0.75 & z2_vec < 2) * mean(z3_vec > 1.5)
## [1] 0.03826
Now, the case of all three analyses under the alternative.
theta <- 0.5
sigma2 <- 1
z_vec <- c()
z2_vec <- c()
z3_vec <- c()
analysis2_count <- 0
analysis3_count <- 0
for (i in 1:100000) {
x1 <- rnorm(20, mean = theta)
x2 <- rnorm(20)
mean.diff <- mean(x1 - x2)
z <- mean.diff * sqrt( 20 / (2*sigma2) )
z_vec[i] <- z
if ( (z > 0) & (z < 2.5) ) {
analysis2_count <- analysis2_count + 1
x3 <- rnorm(20, mean = theta)
x4 <- rnorm(20)
x1 <- c(x1, x3)
x2 <- c(x2, x4)
mean.diff <- mean(x1 - x2)
z2 <- mean.diff * sqrt( 40 / (2*sigma2) )
z2_vec[analysis2_count] <- z2
if ( (z2 > 0.75) & (z2 < 2) ) {
analysis3_count <- analysis3_count + 1
x5 <- rnorm(20, mean = theta)
x6 <- rnorm(20)
x1 <- append(x1, x5)
x2 <- append(x2, x6)
mean.diff <- mean(x1 - x2)
z3 <- mean.diff * sqrt( 60 / (2*sigma2) )
z3_vec[analysis3_count] <- z3
}
}
}
For futility:
mean(z_vec <= 0)
## [1] 0.05728
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec <= 0.75)
## [1] 0.04333
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec > 0.75 & z2_vec < 2) * mean(z3_vec <= 1.5)
## [1] 0.04795
For efficacy:
mean(z_vec > 2.5)
## [1] 0.17826
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec >= 2)
## [1] 0.4192
mean(z_vec > 0 & z_vec < 2.5) * mean(z2_vec > 0.75 & z2_vec < 2) * mean(z3_vec >= 1.5)
## [1] 0.25398
In order to generate a function, we need the following information:
The length of the upper and lower bound vectors should be equal to the number of analyses that are planned. The length of the number of patients at each analysis vector should also be equal to the number of analyses that are planned.
gsd_simulations <- function(n_analyses = 3,
upper_bounds = c(2.5, 2, 1.5),
lower_bounds = c(0, 0.75, 1.5),
n_patients = c(20, 40, 60),
null_hypothesis = 0,
alt_hypothesis = 0.5,
variance = 1) {
# sanity checks
if(length(upper_bounds) != n_analyses) print("Warning: number of upper bounds must equal number of analyses")
if(length(lower_bounds) != n_analyses) print("Warning: number of lower bounds must equal number of analyses")
if(length(n_patients) != n_analyses) print("Warning: number of patients vector must equal number of analyses")
if(length(upper_bounds) != length(lower_bounds)) {
print("Warning: number of upper bounds must equal number of analyses")
}
# assign values for null and alt hypotheses
theta_0 <- null_hypothesis
delta <- alt_hypothesis
# empty mean vectors to fill
mean_0 <- c()
mean_1 <- c()
# need to parse the upper and lower boundaries of the design
# for futility and efficacy, must put the bounds of integration correctly
# for pmvnorm
futility_l_bounds <- list()
futility_u_bounds <- list()
efficacy_l_bounds <- list()
efficacy_u_bounds <- list()
n_analyses <- length(upper_bounds)
for (i in 1:n_analyses) {
# special case of i = 1
if (i == 1) {
futility_l_bounds[[i]] <- lower_bounds[i]
futility_u_bounds[[i]] <- upper_bounds[i]
efficacy_l_bounds[[i]] <- lower_bounds[i]
efficacy_u_bounds[[i]] <- upper_bounds[i]
next
}
# all other cases
futility_l_bounds[[i]] <- c(lower_bounds[1:i-1], -Inf)
futility_u_bounds[[i]] <- c(upper_bounds[1:i-1], lower_bounds[i])
efficacy_l_bounds[[i]] <- c(lower_bounds[1:i-1], upper_bounds[i])
efficacy_u_bounds[[i]] <- c(upper_bounds[1:i-1], Inf)
}
# list of probabilities to return
probs_to_return <- list()
# list of SIGMAs
SIGMA_list <- list()
for (i in 1:n_analyses) {
if (i == 1) next
# start with diagonal matrix for SIGMA
SIGMA <- diag(nrow = i)
# n = 2, need to fill all but 11, 22
# n = 3, need to fill all but 11, 22, 33
# n = 4, need to fill all but 11, 22, 33, 44
# etc.
for(i in 1:i) {
for(j in 1:i) {
# leave the 1s on the diagonal, skip this iteration of for loop
if(i == j) next
# when i is less than j, the lower number of patients will be in numerator
if(i < j) SIGMA[i,j] <- sqrt(n_patients[i] / n_patients[j])
# when i is greater than j, the lower number of patients will be in numerator
if(i > j) SIGMA[i,j] <- sqrt(n_patients[j] / n_patients[i])
}
}
SIGMA_list[[i]] <- SIGMA
}
for (i in 1:n_analyses) {
##############
# ANALYSIS 1 #
##############
if(i == 1) {
# mean under null
mean_0[i] <- theta_0 * sqrt(n_patients[i]/(2*variance))
# mean under alternative
mean_1[i] <- delta * sqrt(n_patients[i]/(2*variance))
# prob stop for futility, null
futility_null <- pnorm(futility_l_bounds[[i]],
mean = mean_0,
sd = sqrt(variance))
# prob stop for efficacy, null
efficacy_null <- pnorm(efficacy_u_bounds[[i]],
mean = mean_0,
sd = sqrt(variance),
lower.tail = FALSE)
# prob stop for futility, alt
futility_alt <- pnorm(futility_l_bounds[[i]],
mean = mean_1,
sd = sqrt(variance))
# prob stop for efficacy
efficacy_alt <- pnorm(efficacy_u_bounds[[i]],
mean = mean_1,
sd = sqrt(variance),
lower.tail = FALSE)
probs_to_return[[i]] <- c(futility_null, efficacy_null, futility_alt, efficacy_alt)
names(probs_to_return[[i]]) <- c("futility_null", "efficacy_null", "futility_alt", "efficacy_alt")
next
}
######################
# ALL OTHER ANALYSES #
######################
# next mean under null
mean_0[i] <- theta_0 * sqrt(n_patients[i] / (2 * variance))
# next mean under alternative
mean_1[i] <- delta * sqrt(n_patients[i]/ (2*variance))
# bounds for these will be same
# futility under null
futility_null <- pmvnorm(lower = futility_l_bounds[[i]],
upper = futility_u_bounds[[i]],
mean = mean_0, corr = SIGMA_list[[i]])
# futility under alt
futility_alt <- pmvnorm(lower = futility_l_bounds[[i]],
upper = futility_u_bounds[[i]],
mean = mean_1, corr = SIGMA_list[[i]])
# bounds for these will be same
# futility under null
efficacy_null <- pmvnorm(lower = efficacy_l_bounds[[i]],
upper = efficacy_u_bounds[[i]],
mean = mean_0, corr = SIGMA_list[[i]])
# futility under alt
efficacy_alt <- pmvnorm(lower = efficacy_l_bounds[[i]],
upper = efficacy_u_bounds[[i]],
mean = mean_1, corr = SIGMA_list[[i]])
probs_to_return[[i]] <- c(futility_null, efficacy_null, futility_alt, efficacy_alt)
names(probs_to_return[[i]]) <- c("futility_null", "efficacy_null", "futility_alt", "efficacy_alt")
}
# return the probabilities
probs_to_return
}
gsd_simulations(n_analyses = 3,
upper_bounds = c(2.5, 2, 1.5),
lower_bounds = c(0, 0.75, 1.5),
n_patients = c(20, 40, 60),
null_hypothesis = 0,
alt_hypothesis = 0.5,
variance = 1)
## [[1]]
## futility_null efficacy_null futility_alt efficacy_alt
## 0.500000000 0.006209665 0.056923149 0.179084096
##
## [[2]]
## futility_null efficacy_null futility_alt efficacy_alt
## 0.29877595 0.01941484 0.04232328 0.41968371
##
## [[3]]
## futility_null efficacy_null futility_alt efficacy_alt
## 0.13729028 0.03831593 0.04903672 0.25295881
The expected sample size is calculated across a set of possible true treatment effects \(\boldsymbol{\delta} = \{\delta_1, \delta_2, \dots, \delta_n \}\) as a function of the design elements (1) number of analyses \(k = \{1, 2, \dots, K\}\), (2) number of patients at each analysis \(\mathbf{n} = \{n_1, n_2, \dots, n_K\}\), (3) the upper and lower bounds \(\mathbf{u} = (u_1, u_2, \dots, u_K)\) and \(\boldsymbol{\ell} = (\ell_1, \ell_2, \dots, \ell_K)\).
The expected sample size is:
\[ E[N \, | \, \delta]=\sum_{i=1}^K n_i P(\text{trial stops after analysis }i \, | \, \delta) \]
The probability that the trial stops after analysis \(i\) is the sum of the probabilities that the trial stops for efficacy after analysis \(i\) and the probability that the trial stops for futility after analysis \(i\).
\[ \begin{multline*} E[N \, | \, \delta]=\sum_{i=1}^K n_i \Big[ P(\text{trial stops for efficacy after analysis }i \, | \, \delta) + \\ P(\text{trial stops for futility after analysis }i \, | \, \delta) \Big] \end{multline*} \]
Starting with the design given in gsd_simulations()
function default, we can calculate the expected sample size under \(\delta=0\) to start.
probs_to_return <- gsd_simulations(alt_hypothesis = 0)
This loop then pulls the probabilities under the alternative (given above as \(\delta=0\) in the function call).
# example function inputs
n_patients = c(20, 40, 60)
n_analyses = 3
# vector to collect the probabilities
sum_probs <- c()
for (i in 1:n_analyses) {
# pull the probabilities from the list
tmp_probs <- probs_to_return[[i]]
# gather them into a vector
sum_probs <- c(sum_probs, sum(tmp_probs[3:4]))
}
# calculate the expected sample size
ess <- sum(n_patients * sum_probs)
We can add this loop to the function gsd_simulations()
and then we can graph the expected sample size over a vector of
alternative hypotheses \(\boldsymbol{\delta}\).
gsd_simulations <- function(n_analyses = 3,
upper_bounds = c(2.5, 2, 1.5),
lower_bounds = c(0, 0.75, 1.5),
n_patients = c(20, 40, 60),
null_hypothesis = 0,
alt_hypothesis = 0.5,
variance = 1) {
# sanity checks
if(length(upper_bounds) != n_analyses) print("Warning: number of upper bounds must equal number of analyses")
if(length(lower_bounds) != n_analyses) print("Warning: number of lower bounds must equal number of analyses")
if(length(n_patients) != n_analyses) print("Warning: number of patients vector must equal number of analyses")
if(length(upper_bounds) != length(lower_bounds)) {
print("Warning: number of upper bounds must equal number of analyses")
}
# assign values for null and alt hypotheses
theta_0 <- null_hypothesis
delta <- alt_hypothesis
# empty mean vectors to fill
mean_0 <- c()
mean_1 <- c()
# need to parse the upper and lower boundaries of the design
# for futility and efficacy, must put the bounds of integration correctly
# for pmvnorm
futility_l_bounds <- list()
futility_u_bounds <- list()
efficacy_l_bounds <- list()
efficacy_u_bounds <- list()
n_analyses <- length(upper_bounds)
for (i in 1:n_analyses) {
# special case of i = 1
if (i == 1) {
futility_l_bounds[[i]] <- lower_bounds[i]
futility_u_bounds[[i]] <- upper_bounds[i]
efficacy_l_bounds[[i]] <- lower_bounds[i]
efficacy_u_bounds[[i]] <- upper_bounds[i]
next
}
# all other cases
futility_l_bounds[[i]] <- c(lower_bounds[1:i-1], -Inf)
futility_u_bounds[[i]] <- c(upper_bounds[1:i-1], lower_bounds[i])
efficacy_l_bounds[[i]] <- c(lower_bounds[1:i-1], upper_bounds[i])
efficacy_u_bounds[[i]] <- c(upper_bounds[1:i-1], Inf)
}
# list of probabilities to return
probs_to_return <- list()
# list of SIGMAs
SIGMA_list <- list()
for (i in 1:n_analyses) {
if (i == 1) next
# start with diagonal matrix for SIGMA
SIGMA <- diag(nrow = i)
# n = 2, need to fill all but 11, 22
# n = 3, need to fill all but 11, 22, 33
# n = 4, need to fill all but 11, 22, 33, 44
# etc.
for(i in 1:i) {
for(j in 1:i) {
# leave the 1s on the diagonal, skip this iteration of for loop
if(i == j) next
# when i is less than j, the lower number of patients will be in numerator
if(i < j) SIGMA[i,j] <- sqrt(n_patients[i] / n_patients[j])
# when i is greater than j, the lower number of patients will be in numerator
if(i > j) SIGMA[i,j] <- sqrt(n_patients[j] / n_patients[i])
}
}
SIGMA_list[[i]] <- SIGMA
}
for (i in 1:n_analyses) {
##############
# ANALYSIS 1 #
##############
if(i == 1) {
# mean under null
mean_0[i] <- theta_0 * sqrt(n_patients[i]/(2*variance))
# mean under alternative
mean_1[i] <- delta * sqrt(n_patients[i]/(2*variance))
# prob stop for futility, null
futility_null <- pnorm(futility_l_bounds[[i]],
mean = mean_0,
sd = sqrt(variance))
# prob stop for efficacy, null
efficacy_null <- pnorm(efficacy_u_bounds[[i]],
mean = mean_0,
sd = sqrt(variance),
lower.tail = FALSE)
# prob stop for futility, alt
futility_alt <- pnorm(futility_l_bounds[[i]],
mean = mean_1,
sd = sqrt(variance))
# prob stop for efficacy
efficacy_alt <- pnorm(efficacy_u_bounds[[i]],
mean = mean_1,
sd = sqrt(variance),
lower.tail = FALSE)
probs_to_return[[i]] <- c(futility_null, efficacy_null, futility_alt, efficacy_alt)
names(probs_to_return[[i]]) <- c("futility_null", "efficacy_null", "futility_alt", "efficacy_alt")
next
}
######################
# ALL OTHER ANALYSES #
######################
# next mean under null
mean_0[i] <- theta_0 * sqrt(n_patients[i] / (2 * variance))
# next mean under alternative
mean_1[i] <- delta * sqrt(n_patients[i]/ (2*variance))
# bounds for these will be same
# futility under null
futility_null <- pmvnorm(lower = futility_l_bounds[[i]],
upper = futility_u_bounds[[i]],
mean = mean_0, corr = SIGMA_list[[i]])
# futility under alt
futility_alt <- pmvnorm(lower = futility_l_bounds[[i]],
upper = futility_u_bounds[[i]],
mean = mean_1, corr = SIGMA_list[[i]])
# bounds for these will be same
# futility under null
efficacy_null <- pmvnorm(lower = efficacy_l_bounds[[i]],
upper = efficacy_u_bounds[[i]],
mean = mean_0, corr = SIGMA_list[[i]])
# futility under alt
efficacy_alt <- pmvnorm(lower = efficacy_l_bounds[[i]],
upper = efficacy_u_bounds[[i]],
mean = mean_1, corr = SIGMA_list[[i]])
probs_to_return[[i]] <- c(futility_null, efficacy_null, futility_alt, efficacy_alt)
names(probs_to_return[[i]]) <- c("futility_null", "efficacy_null", "futility_alt", "efficacy_alt")
}
# vector to collect the sum of futility and efficacy probabilities
sum_probs <- c()
for (i in 1:n_analyses) {
# pull the probabilities from the list
tmp_probs <- probs_to_return[[i]]
# gather them into a vector
# 3:4 because we want to calculate under the alternative
sum_probs <- c(sum_probs, sum(tmp_probs[3:4]))
}
# calculate the expected sample size
ess <- sum(n_patients * sum_probs)
# add the expected sample size to the list
return_values <- append(probs_to_return, ess)
# name the list
names_for_list <- as.vector(sapply("analysis_", paste0, 1:n_analyses))
names_for_list <- c(names_for_list, "expected_sample_size")
names(return_values) <- names_for_list
# return probabilities and ESS
return_values
}
Now we can test the function:
gsd_simulations(alt_hypothesis = 0)
## $analysis_1
## futility_null efficacy_null futility_alt efficacy_alt
## 0.500000000 0.006209665 0.500000000 0.006209665
##
## $analysis_2
## futility_null efficacy_null futility_alt efficacy_alt
## 0.29877595 0.01941484 0.29877595 0.01941484
##
## $analysis_3
## futility_null efficacy_null futility_alt efficacy_alt
## 0.13728701 0.03831739 0.13728349 0.03831275
##
## $expected_sample_size
## [1] 33.3876
Finally, we can calculate the ESS over a range of values:
ess_vector <- c()
delta_vector <- seq(from = 0, to = 1, length.out = 100)
for (i in 1:100) {
ess_vector[i] <- gsd_simulations(alt_hypothesis = delta_vector[i])$expected_sample_size
}
Then, we can graph this:
ggplot() +
geom_line(mapping = aes(x = delta_vector, y = ess_vector)) +
labs(x = "True \u03B4", y = "Expected sample size",
title = "Expected sample size over range of true \u03B4 values")
# load required libraries
library(gplite)
## This is gplite version 0.13.0
Generate some data for the function to which the model will be fit.
n <- 5
x <- matrix(seq(-2, 2, length.out = n), ncol = 1)
y <- x^2
Plot the data.
ggplot() +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Initialize the GP model.
# Specify the GP model we want to use:
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 0.3,
magn = 1,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.5,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
In order to plot the negative log marginal likelihood for different
hyperparameters combinations, we will need to fit the GP model without
optimizing (i.e., without using gp_optim).
gp_raw <- gp_fit(gp_empty, x, y)
Plot the model with the raw values.
# compute the predictive mean and variance in a grid of points
xt <- seq(-4, 4, len=150)
pred <- gp_pred(gp_raw, xt, var = T)
# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
geom_line(aes(x=xt, y=mu), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Pull the negative log marginal likelihood.
gp_energy(gp_raw)
## [1] 18.73287
If the GP model is optimized, the negative log marginal likelihood should improve.
# Now fit the model to the data:
gp_optimized <- gp_optim(gp_empty, x, y, verbose = FALSE)
# compute the predictive mean and variance in a grid of points
xt <- seq(-4, 4, len=150)
pred <- gp_pred(gp_optimized, xt, var = T)
# visualize
mu <- pred$mean
lb <- pred$mean - 2*sqrt(pred$var)
ub <- pred$mean + 2*sqrt(pred$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb, ymax=ub), fill='lightgray') +
geom_line(aes(x=xt, y=mu), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
gp_energy(gp_optimized)
## [1] 11.887
We can repeat the above process of fitting the raw model with different hyperparameter values. The squared-exponential covariance function has three tunable hyperparameters:
These parameters can be varied independently and collected in vectors such as \((\ell, \sigma_{f}^2, \sigma_{n}^2) = (1,1,0.1)\). In order to graph the surface of the negative log marginal likelihood, we can fix one of these parameters. In this example, we can assume there is no noise and fix the noise parameter \(\sigma_{n}^2\) at 0.01.
Create a grid of hyperparameters:
ell <- seq(0.1, 4, by = 0.05)
sigma_f <- seq(0.1, 4, by = 0.05)
hyperparams <- expand.grid(
ell = ell,
sigma_f = sigma_f
)
dim(hyperparams)
## [1] 6241 2
head(hyperparams)
## ell sigma_f
## 1 0.10 0.1
## 2 0.15 0.1
## 3 0.20 0.1
## 4 0.25 0.1
## 5 0.30 0.1
## 6 0.35 0.1
We can now fit the model with each of these hyperparameters and obtain the negative log marginal likelihood.
# collect energies in the empty vector
energy <- c()
# run 6000+ models
for (i in 1:dim(hyperparams)[1]) {
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = hyperparams$ell[i],
magn = hyperparams$sigma_f[i],
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw <- gp_fit(gp_empty, x, y)
energy[i] <- gp_energy(gp_raw)
}
head(energy)
## [1] 1676.109 1676.109 1676.106 1675.846 1673.105 1663.387
Add the energy vector to the hyperparameter vector.
surface_plot <- cbind(hyperparams, energy)
head(surface_plot)
## ell sigma_f energy
## 1 0.10 0.1 1676.109
## 2 0.15 0.1 1676.109
## 3 0.20 0.1 1676.106
## 4 0.25 0.1 1675.846
## 5 0.30 0.1 1673.105
## 6 0.35 0.1 1663.387
Where is the minimum value?
surface_plot[which.min(surface_plot$energy),]
## ell sigma_f energy
## 6190 1.45 4 11.07394
This is near the theoretical value that was found by
gp_optim. Now, we can plot the surface.
ggplot(data = surface_plot) +
geom_contour(mapping = aes(x = ell, y = sigma_f, z = energy),
binwidth = 10)
We can also plot the surface.
Entire surface:
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
fig <- plot_ly() %>%
add_trace(
data = surface_plot,
x = unique(surface_plot$ell),
y = unique(surface_plot$sigma_f),
z = matrix(surface_plot$energy, nrow = 79, ncol = 79, byrow = TRUE),
type = "surface") %>%
layout(
scene = list(
xaxis = list(
title = "length-scale parameter"
),
yaxis = list(
title = "signal variance"
),
zaxis = list(
title = "-log(marginal likelihood)"
)
)
)
fig
Surface limited to lower values of negative log marginal likelihood:
library(plotly)
fig <- plot_ly() %>%
add_trace(
data = surface_plot,
x = unique(surface_plot$ell)[1:30],
y = unique(surface_plot$sigma_f)[50:79],
z = matrix(surface_plot$energy, nrow = 79, ncol = 79, byrow = TRUE)[50:79, 1:30],
type = "surface") %>%
add_trace(
x = 1.45,
y = 4,
z = 11.07394,
type = "scatter3d",
mode = "markers"
) %>%
layout(
scene = list(
aspectmode = 'manual', # Set to manual for custom aspect ratio
aspectratio = list(x = 1, y = 1, z = 0.5), # Adjust x, y, z ratios
xaxis = list(
title = "length-scale parameter"
),
yaxis = list(
title = "signal variance"
),
zaxis = list(
title = "-log(marginal likelihood)"
)
)
)
fig
Looking at the above plot, it appears that values for
sigma_f that are greater than 4 may improve the negative
log marginal likelihood further. We can assess this with new GP
fits.
ell <- seq(0.5, 3, by = 0.05)
sigma_f <- seq(2, 8, by = 0.05)
hyperparams <- expand.grid(
ell = ell,
sigma_f = sigma_f
)
# collect energies in the empty vector
energy <- c()
# run 6000+ models
for (i in 1:dim(hyperparams)[1]) {
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = hyperparams$ell[i],
magn = hyperparams$sigma_f[i],
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw <- gp_fit(gp_empty, x, y)
energy[i] <- gp_energy(gp_raw)
}
surface_plot <- cbind(hyperparams, energy)
surface_plot[which.min(surface_plot$energy),]
## ell sigma_f energy
## 4212 1.95 6.1 11.05812
Plot this new run.
fig <- plot_ly() %>%
add_trace(
x = unique(surface_plot$sigma_f)[80:121],
y = unique(surface_plot$ell)[10:51],
z = matrix(surface_plot$energy, nrow = 51, ncol = 121)[10:51, 80:121],
type = "surface") %>%
add_trace(
x = 6.1,
y = 1.95,
z = 11.05812,
type = "scatter3d",
mode = "markers"
) %>%
layout(
scene = list(
aspectmode = 'manual', # Set to manual for custom aspect ratio
aspectratio = list(x = 1, y = 1, z = 0.5), # Adjust x, y, z ratios
xaxis = list(
title = "signal variance"
),
yaxis = list(
title = "length-scale parameter"
),
zaxis = list(
title = "-log(marginal likelihood)"
)
)
)
fig
Now that we have explored these surfaces, we can review how the selections of each of these surfaces affects the fitting of the GP regression model.
Model with \(\ell = 2\) and \(\sigma_f^2 = 6\), near the optimal values. Recall that we are setting \(sigma_n^2 = 0.01\).
# Specify the GP model we want to use,
# we will specify the lscale and magn using values from above
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 2,
magn = 6,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw_ell2sigma6 <- gp_fit(gp_empty, x, y)
Plot the model.
# compute the predictive mean and variance in a grid of points
xt <- seq(-4, 4, len=150)
pred_ell2sigma6 <- gp_pred(gp_raw_ell2sigma6, xt, var = T)
# visualize
mu_ell2sigma6 <- pred_ell2sigma6$mean
lb_ell2sigma6 <- pred_ell2sigma6$mean - 2*sqrt(pred_ell2sigma6$var)
ub_ell2sigma6 <- pred_ell2sigma6$mean + 2*sqrt(pred_ell2sigma6$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray') +
geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Model with \(\ell = 1\) and \(\sigma_f^2 = 6\):
# Specify the GP model we want to use,
# we will specify the lscale and magn using values from above
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 1,
magn = 6,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw_ell1sigma6 <- gp_fit(gp_empty, x, y)
Plot the model.
# compute the predictive mean and variance in a grid of points
pred_ell1sigma6 <- gp_pred(gp_raw_ell1sigma6, xt, var = T)
# visualize
mu_ell1sigma6 <- pred_ell1sigma6$mean
lb_ell1sigma6 <- pred_ell1sigma6$mean - 2*sqrt(pred_ell1sigma6$var)
ub_ell1sigma6 <- pred_ell1sigma6$mean + 2*sqrt(pred_ell1sigma6$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb_ell1sigma6, ymax=ub_ell1sigma6), fill='orange', alpha = 0.3) +
geom_line(aes(x=xt, y=mu_ell1sigma6), linewidth = 0.5, color = "orange") +
geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray', alpha = 0.6) +
geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Model with \(\ell = 0.1\) and \(\sigma_f^2 = 6\):
# Specify the GP model we want to use,
# we will specify the lscale and magn using values from above
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 0.1,
magn = 6,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw_ell01sigma6 <- gp_fit(gp_empty, x, y)
Plot the model.
# compute the predictive mean and variance in a grid of points
pred_ell01sigma6 <- gp_pred(gp_raw_ell01sigma6, xt, var = T)
# visualize
mu_ell01sigma6 <- pred_ell01sigma6$mean
lb_ell01sigma6 <- pred_ell01sigma6$mean - 2*sqrt(pred_ell01sigma6$var)
ub_ell01sigma6 <- pred_ell01sigma6$mean + 2*sqrt(pred_ell01sigma6$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb_ell01sigma6, ymax=ub_ell01sigma6), fill='purple', alpha = 0.3) +
geom_line(aes(x=xt, y=mu_ell01sigma6), linewidth = 0.5, color = "purple") +
geom_ribbon(aes(x=xt, ymin=lb_ell1sigma6, ymax=ub_ell1sigma6), fill='orange', alpha = 0.3) +
geom_line(aes(x=xt, y=mu_ell1sigma6), linewidth = 0.5, color = "orange") +
geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray', alpha = 0.6) +
geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Plot the “near optimal” model.
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray', alpha = 0.6) +
geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Model with \(\ell = 2\) and \(\sigma_f^2 = 0.1\):
# Specify the GP model we want to use,
# we will specify the lscale and magn using values from above
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 2,
magn = 0.1,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw_ell2sigma01 <- gp_fit(gp_empty, x, y)
Plot the model.
# compute the predictive mean and variance in a grid of points
pred_ell2sigma01 <- gp_pred(gp_raw_ell2sigma01, xt, var = T)
# visualize
mu_ell2sigma01 <- pred_ell2sigma01$mean
lb_ell2sigma01 <- pred_ell2sigma01$mean - 2*sqrt(pred_ell2sigma01$var)
ub_ell2sigma01 <- pred_ell2sigma01$mean + 2*sqrt(pred_ell2sigma01$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb_ell2sigma01, ymax=lb_ell2sigma01), fill='orange', alpha = 0.3) +
geom_line(aes(x=xt, y=mu_ell2sigma01), linewidth = 0.5, color = "orange") +
geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray', alpha = 0.6) +
geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')
Model with \(\ell = 2\) and \(\sigma_f^2 = 0.03\):
# Specify the GP model we want to use,
# we will specify the lscale and magn using values from above
gp_empty <- gp_init(
# A squared exponential (aka Gaussian aka RBF) kernel
cfs = cf_sexp(
vars = NULL,
lscale = 2,
magn = 0.03,
prior_lscale = prior_logunif(),
prior_magn = prior_logunif(),
normalize = FALSE
),
# Assume Gaussian distributed errors
lik = lik_gaussian(
sigma = 0.01,
prior_sigma = prior_logunif()
),
# Use the full covariance (i.e., do not approximate)
method = method_full()
)
gp_raw_ell2sigma003 <- gp_fit(gp_empty, x, y)
Plot the model.
# compute the predictive mean and variance in a grid of points
pred_ell2sigma003 <- gp_pred(gp_raw_ell2sigma003, xt, var = T)
# visualize
mu_ell2sigma003 <- pred_ell2sigma003$mean
lb_ell2sigma003 <- pred_ell2sigma003$mean - 2*sqrt(pred_ell2sigma003$var)
ub_ell2sigma003 <- pred_ell2sigma003$mean + 2*sqrt(pred_ell2sigma003$var)
ggplot() +
geom_ribbon(aes(x=xt, ymin=lb_ell2sigma003, ymax=ub_ell2sigma003), fill='purple', alpha = 0.3) +
geom_line(aes(x=xt, y=mu_ell2sigma003), linewidth = 0.5, color = "purple") +
geom_ribbon(aes(x=xt, ymin=lb_ell2sigma01, ymax=lb_ell2sigma01), fill='orange', alpha = 0.3) +
geom_line(aes(x=xt, y=mu_ell2sigma01), linewidth = 0.5, color = "orange") +
geom_ribbon(aes(x=xt, ymin=lb_ell2sigma6, ymax=ub_ell2sigma6), fill='lightgray', alpha = 0.6) +
geom_line(aes(x=xt, y=mu_ell2sigma6), linewidth = 0.5) +
geom_point(aes(x=x, y=y), size=2) +
xlab('x') + ylab('y')